home *** CD-ROM | disk | FTP | other *** search
- /*
- * field.c - routines for dealing with fields and record parsing
- */
-
- /* Copyright © 1986, 1988, 1989 1991 the Free Software Foundation, Inc.
- * This file is part of GAWK, the GNU implementation of the
- * AWK Progamming Language, modified for the Macintosh (also called hAWK).
- * GAWK is free software; you can redistribute or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 1, or any later version.
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- * You should have received a copy of the GNU General Public License
- * along with GAWK; see the file "COPYING hAWK". If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- * Modified for THINK C 4 on the Macintosh by Ken Earle (Dynabyte) Aug 1991.
- */
-
- #include "AWK.H"
-
- /* A generalised "carriage return" for the Mac version - its definition
- varies now and then, tracking down all the variations left as an exercise...*/
- #define CR '\n'
-
- extern void assoc_clear(NODE *symbol);
- extern short a_get_three(NODE *tree, NODE **res1, NODE **res2, NODE **res3);
- extern char get_rs(void);
-
- /*FIELD.C*/
- void init_fields(void);
- static void set_field(short num, char *str, short len, NODE *dummy);
- static void rebuild_record(void);
- void set_record(char *buf, short cnt);
- NODE **get_field(short num, short assign);
- static short parse_fields(short up_to, char **buf, short len,
- register char *fs, void (*set) (short, char *, short, NODE *), NODE *n);
- static short re_split(char *buf, short len, char *fs,
- struct re_registers *reregsp);
- NODE *do_split(NODE *tree);
- static char *get_fs(void);
- static void set_element(short num, char *s, short len, NODE *n);
-
- char *line_buf = NULL; /* holds current input line */
-
- static char *parse_extent; /* marks where to restart parse of record */
- static short parse_high_water=0; /* field number that we have parsed so far */
- static char f_empty[1] /*= ""*/;
- static char *save_fs /*= " "*/; /* save current value of FS when line is read,
- * to be used in deferred parsing
- */
-
-
- NODE **fields_arr; /* array of pointers to the field nodes */
- NODE node0; /* node for $0 which never gets free'd */
- short node0_valid = 1; /* $(>0) has not been changed yet */
-
- void init_fields()
- {
- emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields");
- node0.type = Node_val;
- node0.stref = 0;
- node0.stptr = "";
- node0.flags = (STR|PERM); /* never free buf */
- fields_arr[0] = &node0;
- }
-
- /*
- * Danger! Must only be called for fields we know have just been blanked, or
- * fields we know don't exist yet.
- */
- static short nf_high_water = 0;
- /*ARGSUSED*/
- static void set_field(short num, char *str, short len, NODE *dummy)
- /*NODE *dummy; not used -- just to make interface same as set_element */
- {
- NODE *n;
- short t;
-
-
- if (num > nf_high_water) {
- erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "set_field");
- nf_high_water = num;
- }
- /* fill in fields that don't exist */
- for (t = parse_high_water + 1; t < num; t++)
- fields_arr[t] = Nnull_string;
- n = make_string(str, len);
- (void) force_number(n);
- fields_arr[num] = n;
- parse_high_water = num;
- }
-
- /* Someone assigned a value to $(something). Fix up $0 to be right */
- static void rebuild_record()
- {
- register short tlen;
- register NODE *tmp;
- NODE *ofs;
- char *ops;
- register char *cops;
- register NODE **ptr;
- register short ofslen;
-
- tlen = 0;
- ofs = force_string(OFS_node->var_value);
- ofslen = ofs->stlen;
- ptr = &fields_arr[parse_high_water];
- while (ptr > &fields_arr[0]) {
- tmp = force_string(*ptr);
- tlen += tmp->stlen;
- ptr--;
- }
- tlen += (parse_high_water - 1) * ofslen;
- emalloc(ops, char *, tlen + 1, "fix_fields");
- cops = ops;
- ops[0] = '\0';
- for (ptr = &fields_arr[1]; ptr <= &fields_arr[parse_high_water]; ptr++) {
- tmp = *ptr;
- if (tmp->stlen == 1)
- *cops++ = tmp->stptr[0];
- else if (tmp->stlen != 0) {
- memcpy(cops, tmp->stptr, tmp->stlen);
- cops += tmp->stlen;
- }
- if (ptr != &fields_arr[parse_high_water]) {
- if (ofslen == 1)
- *cops++ = ofs->stptr[0];
- else if (ofslen != 0) {
- memcpy(cops, ofs->stptr, ofslen);
- cops += ofslen;
- }
- }
- }
- tmp = make_string(ops, tlen);
- free(ops);
- deref = fields_arr[0];
- do_deref();
- fields_arr[0] = tmp;
- }
-
- /*
- * setup $0, but defer parsing rest of line until reference is made to $(>0)
- * or to NF. At that point, parse only as much as necessary.
- */
- void set_record(char *buf, short cnt)
- {
- register short i;
-
- assign_number(&NF_node->var_value, (AWKNUM)-1);
- for (i = 1; i <= parse_high_water; i++) {
- deref = fields_arr[i];
- do_deref();
- }
- parse_high_water = 0;
- node0_valid = 1;
- if (buf == line_buf) {
- deref = fields_arr[0];
- do_deref();
- save_fs = get_fs();
- node0.type = Node_val;
- node0.stptr = buf;
- node0.stlen = cnt;
- node0.stref = 1;
- node0.flags = (STR|PERM); /* never free buf */
- fields_arr[0] = &node0;
- }
- }
-
- NODE **get_field(short num, short assign)
- /*short assign; this field is on the LHS of an assign */
- {
- short n;
-
- /*
- * if requesting whole line but some other field has been altered,
- * then the whole line must be rebuilt
- */
- if (num == 0 && (node0_valid == 0 || assign)) {
- /* first, parse remainder of input record */
- if (NF_node->var_value->numbr == -1) {
- if (parse_high_water == 0)
- parse_extent = node0.stptr;
- n = parse_fields(HUGE-1, &parse_extent,
- node0.stlen - (parse_extent - node0.stptr),
- save_fs, set_field, (NODE *)NULL);
- assign_number(&NF_node->var_value, (AWKNUM)n);
- }
- if (node0_valid == 0)
- rebuild_record();
- return &fields_arr[0];
- }
- if (num > 0 && assign)
- node0_valid = 0;
- if (num <= parse_high_water) /* we have already parsed this field */
- return &fields_arr[num];
- if (parse_high_water == 0 && num > 0) /* starting at the beginning */
- parse_extent = fields_arr[0]->stptr;
- /*
- * parse up to num fields, calling set_field() for each, and saving
- * in parse_extent the point where the parse left off
- */
- n = parse_fields(num, &parse_extent,
- fields_arr[0]->stlen - (short)(parse_extent-fields_arr[0]->stptr),
- save_fs, set_field, (NODE *)NULL);
- if (num == HUGE-1)
- num = n;
- if (n < num) { /* requested field number beyond end of record;
- * set_field will just extend the number of fields,
- * with empty fields
- */
- set_field(num, f_empty, 0, (NODE *) NULL);
- /*
- * if this field is onthe LHS of an assignment, then we want to
- * set NF to this value, below
- */
- if (assign)
- n = num;
- }
- /*
- * if we reached the end of the record, set NF to the number of fields
- * so far. Note that num might actually refer to a field that
- * is beyond the end of the record, but we won't set NF to that value at
- * this point, since this is only a reference to the field and NF
- * only gets set if the field is assigned to -- in this case n has
- * been set to num above
- */
- if (*parse_extent == '\0')
- assign_number(&NF_node->var_value, (AWKNUM)n);
-
- return &fields_arr[num];
- }
-
- /*
- * this is called both from get_field() and from do_split()
- */
- static short parse_fields(short up_to, char **buf, short len,
- register char *fs, void (*set) (short, char *, short, NODE *), NODE *n)
- /*short up_to; parse only up to this field number */
- /*char **buf; on input: string to parse; on output: point to start next */
- /*void (*set) (); routine to set the value of the parsed field */
- {
- char *s = *buf;
- register char *field;
- register char *scan;
- register char *end = s + len;
- short NF = parse_high_water;
- char rs = get_rs();
-
-
- if (up_to == HUGE)
- NF = 0;
- if (*fs && *(fs + 1) != '\0') { /* fs is a regexp */
- struct re_registers reregs;
-
- /* TEST ONLY
- SysBeep(2);
- */
- scan = s;
- if (rs == 0 && STREQ(FS_node->var_value->stptr, " ")) {
- while ((*scan == CR || *scan == ' ' || *scan == '\t')
- && scan < end)
- scan++;
- }
- s = scan;
- while (scan < end
- && re_split(scan, (short)(end - scan), fs, &reregs) != -1
- && NF < up_to) {
- if (reregs.end[0] == 0) { /* null match */
- scan++;
- if (scan == end) {
- (*set)(++NF, s, (short)(scan - s), n);
- up_to = NF;
- break;
- }
- continue;
- }
- (*set)(++NF, s, (short)(scan - s) + reregs.start[0], n);
- scan += reregs.end[0];
- s = scan;
- }
- if (NF != up_to && scan <= end) {
- if (!(rs == 0 && scan == end)) {
- (*set)(++NF, scan, (short)(end - scan), n);
- scan = end;
- }
- }
- *buf = scan;
- return (NF);
- }
- for (scan = s; scan < end && NF < up_to; scan++) {
- /*
- * special case: fs is single space, strip leading
- * whitespace
- */
- if (*fs == ' ') {
- while ((*scan == ' ' || *scan == '\t') && scan < end)
- scan++;
- if (scan >= end)
- break;
- }
- field = scan;
- if (*fs == ' ')
- while (*scan != ' ' && *scan != '\t' && scan < end)
- scan++;
- else {
- while (*scan != *fs && scan < end)
- scan++;
- if (rs && scan == end-1 && *scan == *fs) {
- (*set)(++NF, field, (short)(scan - field), n);
- /* Mac note - cast to short just above added for THINK C,
- what the heck it works. Getting real annoyed by now at
- THINK C's charming little eccentricities concerning
- conversion between different species of integer...*/
- field = scan;
- }
- }
- (*set)(++NF, field, (short)(scan - field), n);
- if (scan == end)
- break;
- }
- *buf = scan;
- return NF;
- }
-
- typedef struct re_pattern_buffer RPAT;
- static RPAT *rp;
- static char *last_fs = NULL;
- static short re_split(char *buf, short len, char *fs,
- struct re_registers *reregsp)
- {
-
-
-
- if ((last_fs != NULL && !STREQ(fs, last_fs))
- || (rp && /* ! strict && */ ((IGNORECASE_node->var_value->numbr != 0)
- ^ (rp->translate != NULL))))
- {
- /* fs has changed or IGNORECASE has changed */
- free(rp->buffer);
- free(rp->fastmap);
- free((char *) rp);
- free(last_fs);
- last_fs = NULL;
- }
- if (last_fs == NULL) { /* first time */
- emalloc(rp, RPAT *, sizeof(RPAT), "re_split");
- memset((char *) rp, 0, sizeof(RPAT));
- emalloc(rp->buffer, char *, 16, "re_split");
- rp->allocated = 16;
- emalloc(rp->fastmap, char *, 256, "re_split");
- emalloc(last_fs, char *, strlen(fs) + 1, "re_split");
- (void) strcpy(last_fs, fs);
- if (/* ! strict && */ IGNORECASE_node->var_value->numbr != 0.0)
- rp->translate = casetable;
- else
- rp->translate = NULL;
- if (re_compile_pattern(fs, strlen(fs), rp) != NULL)
- fatal("illegal regular expression for FS: `%s'", fs);
- }
- return re_search(rp, buf, len, 0, len, reregsp);
- }
-
- NODE *do_split(NODE *tree)
- {
- NODE *t1, *t2, *t3;
- register char *splitc;
- char *s;
- NODE *n;
-
- if (a_get_three(tree, &t1, &t2, &t3) < 3)
- splitc = get_fs();
- else
- splitc = force_string(t3)->stptr;
-
- n = t2;
- if (t2->type == Node_param_list)
- n = stack_ptr[t2->param_cnt];
- if (n->type != Node_var && n->type != Node_var_array)
- fatal("second argument of split is not a variable");
- assoc_clear(n);
-
- tree = force_string(t1);
-
- s = tree->stptr;
- return tmp_number((AWKNUM)
- parse_fields(HUGE, &s, tree->stlen, splitc, set_element, n));
- }
-
- static char buf[10];
- static char *get_fs()
- {
- register NODE *tmp;
-
-
- tmp = force_string(FS_node->var_value);
- if (get_rs() == 0) {
- if (tmp->stlen == 1) {
- if (tmp->stptr[0] == ' ')
- (void) strcpy(buf, "[ \n]+");
- else
- sprintf(buf, "[%c\n]", tmp->stptr[0]);
- } else if (tmp->stlen == 0) {
- buf[0] = CR;
- buf[1] = '\0';
- } else
- return tmp->stptr;
- return buf;
- }
- return tmp->stptr;
- }
-
- static void set_element(short num, char *s, short len, NODE *n)
- {
- *assoc_lookup(n, tmp_number((AWKNUM) (num))) = make_string(s, len);
- }
-
- /* Some inits; hAWK is set up as a CODE resource, requiring that
- some initializations dealing with addresses be done rather awkwardly
- due to this referencing globals off a4 jazz. I may not have noted this
- elsewhere, but the inits for hAWK are sufficient that hAWK could be
- recast as a standalone application, just call all the init functions
- before each run as done at the top of AWKmain(). Just a thought. */
-
- extern void NullOut(char *str, long nBytes);
-
- void InitField(void);
-
- void InitField()
- {
- line_buf = NULL; /* holds current input line */
-
- parse_extent = NULL; /* marks where to restart parse of record */
- parse_high_water=0; /* field number that we have parsed so far */
- f_empty[0] = 0;
- save_fs = " "; /* save current value of FS when line is read,
- * to be used in deferred parsing
- */
- fields_arr = NULL; /* array of pointers to the field nodes */
- NullOut((Ptr)(&node0), sizeof(NODE));
- node0_valid = 1; /* $(>0) has not been changed yet */
- /* in set_field: */
- nf_high_water = 0;
- /* in re_split */
- rp = NULL;
- last_fs = NULL;
- /* in get_fs: */
- NullOut((Ptr)buf, sizeof(buf));
- }
-
- void SaveField(void);
- void RestoreField(void);
- void SaveField()
- {
- hs->line_buf = line_buf;
- hs->parse_extent = parse_extent;
- hs->parse_high_water = parse_high_water;
- hs->save_fs = save_fs;
- hs->node0 = node0;
- hs->nf_high_water = nf_high_water;
- hs->rp = (struct re_pattern_buffer*)rp;
- hs->last_fs = last_fs;
- BlockMove((Ptr)buf, (Ptr)(hs->buf), sizeof(buf));
- }
-
- void RestoreField()
- {
- line_buf = hs->line_buf;
- parse_extent = hs->parse_extent;
- parse_high_water = hs->parse_high_water;
- save_fs = hs->save_fs;
- node0 = hs->node0;
- nf_high_water = hs->nf_high_water;
- rp = (RPAT *)(hs->rp);
- last_fs = hs->last_fs;
- BlockMove((Ptr)(hs->buf), (Ptr)buf, sizeof(buf));
- }
-